
%{

(* PASLEX.L: lexical analyzer for Pascal, adapted to TP Lex, 2-28-89 AG *)

%}

%{
(*
 * lex input file for pascal scanner
 *
 * extensions: to ways to spell "external" and "->" ok for "^".
 *
 *)
%}

%{

(* Note: Keywords are determined by scanning a keyword table, rather
   than including the keyword patterns in the Lex source which is done
   in the original version of this file. I prefer this method, because
   it makes the grammar itself more readable (handling case-insensitive
   keywords in Lex is quite cumbersome, e.g., you will have to write
   something like [Aa][Nn][Dd] to match the keyword `and'), and also
   produces a more (space-) efficient analyzer (184 states and 375
   transitions for the keyword pattern version, against only 40 states
   and 68 transitions for the keyword table version). *)

procedure commenteof;
  begin
    writeln('unexpected EOF inside comment at line ', yylineno);
  end(*commenteof*);

function upper(str : String) : String;
  (* converts str to uppercase *)
  var i : integer;
  begin
    for i := 1 to length(str) do
      str[i] := upCase(str[i]);
    upper := str
  end(*upper*);

function is_keyword(id : string; var token : integer) : boolean;
  (* checks whether id is Pascal keyword; if so, returns corresponding
     token number in token *)
  const
    id_len = 20;
  type
    Ident = string[id_len];
  const
    (* table of Pascal keywords: *)
    no_of_keywords = 39;
    keyword : array [1..no_of_keywords] of Ident = (
      'AND',       'ARRAY',     'BEGIN',    'CASE',
      'CONST',     'DIV',       'DO',       'DOWNTO',
      'ELSE',      'END',       'EXTERNAL', 'EXTERN',
      'FILE',      'FOR',       'FORWARD',  'FUNCTION',
      'GOTO',      'IF',        'IN',       'LABEL',
      'MOD',       'NIL',       'NOT',      'OF',
      'OR',        'OTHERWISE', 'PACKED',   'PROCEDURE',
      'PROGRAM',   'RECORD',    'REPEAT',   'SET',
      'THEN',      'TO',        'TYPE',     'UNTIL',
      'VAR',       'WHILE',     'WITH');
    keyword_token : array [1..no_of_keywords] of integer = (
      _AND,        _ARRAY,      _BEGIN,     _CASE,
      _CONST,      _DIV,        _DO,        _DOWNTO,
      _ELSE,       _END,        _EXTERNAL,  _EXTERNAL,
                                (* EXTERNAL: 2 spellings (see above)! *)
      _FILE,       _FOR,        _FORWARD,   _FUNCTION,
      _GOTO,       _IF,         _IN,        _LABEL,
      _MOD,        _NIL,        _NOT,       _OF,
      _OR,         _OTHERWISE,  _PACKED,    _PROCEDURE,
      _PROGRAM,    _RECORD,     _REPEAT,    _SET,
      _THEN,       _TO,         _TYPE,      _UNTIL,
      _VAR,        _WHILE,      _WITH);
  var m, n, k : integer;
  begin
    id := upper(id);
    (* binary search: *)
    m := 1; n := no_of_keywords;
    while m<=n do
      begin
        k := m+(n-m) div 2;
        if id=keyword[k] then
          begin
            is_keyword := true;
            token := keyword_token[k];
            exit
          end
        else if id>keyword[k] then
          m := k+1
        else
          n := k-1
      end;
    is_keyword := false
  end(*is_keyword*);

%}

NQUOTE    [^']

%%

%{

var c  : char;
    kw : integer;

%}

[a-zA-Z]([a-zA-Z0-9])*	if is_keyword(yytext, kw) then
                          return(kw)
                        else
                          return(IDENTIFIER);

":="			return(ASSIGNMENT);
'({NQUOTE}|'')+'	return(CHARACTER_STRING);
":"			return(COLON);
","			return(COMMA);
[0-9]+			return(DIGSEQ);
"."			return(DOT);
".."			return(DOTDOT);
"="			return(EQUAL);
">="			return(GE);
">"			return(GT);
"["			return(LBRAC);
"<="			return(LE);
"("			return(LPAREN);
"<"			return(LT);
"-"			return(MINUS);
"<>"			return(NOTEQUAL);
"+"			return(PLUS);
"]"			return(RBRAC);
[0-9]+"."[0-9]+		return(REALNUMBER);
")"			return(RPAREN);
";"			return(SEMICOLON);
"/"			return(SLASH);
"*"			return(STAR);
"**"			return(STARSTAR);
"->"            	|
"^"			return(UPARROW);

"(*"            	|
"{"			begin
                          repeat
			    c := get_char;
			    case c of
			      '}' : ;
			      '*' : begin
				      c := get_char;
				      if c=')' then exit else unget_char(c)
				    end;
                              #0 : begin
                                     commenteof;
                                     exit;
                                   end;
			    end;
			  until false
                        end;
[ \n\t\f]		;

.			return(ILLEGAL);
